### DiD analysis, viewing split by cable versus network news

library(tidyverse)
library(magrittr)
library(data.table)
library(lubridate)
library(hms)
library(ggthemes)
library(stargazer)
theme_set(theme_few())

### SET WORKING DIRECTORY HERE ###
path_to_archive <- "replication/"
data_dir <- paste0(path_to_archive, "data/")
setwd(data_dir)
plot_dir <- paste0(path_to_archive, "plots/")
tables_dir <- paste0(path_to_archive, "tables/")

### FUNCTION DEFINITIONS FOR WEIGHTING SCHEMES
diff2 <- function(x) x[seq(1,length(x)-1,2)] - x[seq(2,length(x),2)]

inv_var_weight <- function(i,d) {
	weighted.mean(d[i, est], w=sqrt(d[i, n_t] * d[i, n_c]), na.rm=T)
}

total_size_weight <- function(i, d) {
	weighted.mean(d[i,est], w=d[i,n_t] + d[i,n_c], na.rm=T)
}

simple_avg <- function(i,d){
	mean(d[i,est], na.rm=T)
}

bootstrap_channel <- function(netw, f) {
	map_dbl(1:1000, ~ f(sample.int(nrow(dd[cable_network==netw]), replace=T), dd[cable_network==netw]))
}

dd_wide <- readRDS(paste0(data_dir, "all_dd_data_cable_network.rds"))
s_cols <- grep("s_p", colnames(dd_wide), value=T)


dd_wide <- dd_wide[,group:=ifelse(group=="T","T","C")] %>% 
	.[,map(.SD, sum), .SDcols=s_cols, by = .(dma_code, channel, affiliate, program, ad_time_utc, group, cable_network, n_t, n_c1, n_c2, n_c12)] %>%
 	.[,n_c := n_c1 + n_c2 + n_c12]

dd_long <- dd_wide %>% 
	gather(key="hour", val="view_sum", dplyr::starts_with("s_pre"), dplyr::starts_with("s_post")) %>%
	as.data.table

dd_long[, `:=`(view_mean = view_sum / ifelse(group=="C", n_c, n_t),
			   pre_post = sub("s_(pre|post)_(\\d+)", "\\1", hour),
			   hour = sub("s_(pre|post)_(\\d+)", "\\2", hour) %>% as.numeric)]

dd <- 
	dd_long[, .(news_min = sum(view_mean)/60), by = .(dma_code, channel, affiliate, program, ad_time_utc, n_t, n_c, cable_network, group, pre_post)]

dd %<>% setkey(dma_code, channel, affiliate, program, ad_time_utc, n_t, n_c, cable_network, group, pre_post) %>%
	.[, .(est = -diff2(diff2(.SD$news_min))) ,by = .(dma_code, channel, affiliate, program, ad_time_utc, n_t, n_c, cable_network)]

# full sample point estimate, bootstrap
chans <- c("CNN","CSPAN","FNC","HLN","MSNBC","OTHER","NETWORK")
pt_ests <- chans %>% map(~ inv_var_weight(1:nrow(dd[cable_network==.]),dd[cable_network==.])) %>% set_names(chans)
bs <- chans %>% map(~ bootstrap_channel(., inv_var_weight)) %>% set_names(chans)
results <- map2(pt_ests, bs, c) %>% as.data.table %>% .[,replicate:=0:1000]


### confidence intervals ###
quantiles <- c(0.01,0.025,0.975,0.99)

cis <- results[2:1001, map(.SD, quantile, probs=quantiles)] %>% 
			 .[,`:=` (quantile = quantiles, replicate = NULL)]


### TABLE 3(A)

y <- rnorm(100)
x <- matrix(rnorm(100), nrow=100)

fake_model <- lm(y ~ x - 1)
dd_bootstrap_cis <- stargazer(map(1:length(chans), ~ fake_model) %>% set_names(chans),
		  title="Average Effects and Bootstrapped Confidence Intervals, by Channel.",
		  style="apsr",
		  out=paste0(tables_dir, "dd_bootstrap_cis_cable_network.tex"),
		  label="tab:dd_bootstrap_cis_cable_network",
		  coef=results[1,1:length(chans)] %>% as.list, 
		  ci=T,
		  ci.custom=cis[quantile %in% c(0.025, 0.975),1:length(chans)] %>% transpose %>% split(f=1:length(chans)) %>% map(as.matrix) %>% set_names(chans),
		  star.cutoffs = NA,
		  omit.stat = "all",
		  column.labels = chans,
		  dep.var.labels = "Effect (Minutes)",
		  covariate.labels = "Treated",
		  notes="\\parbox[t]{0.7\\linewidth}{Table reports average effects of news viewership on the indicated channel, weighted by the geometric mean of treatment and control group size. OTHER is all other national cable networks, and NETWORK is all local network affiliates. Confidence intervals are the central 95\\% interval of 1000 bootstrap replicates within each subgroup.}",
		  notes.append=F
		  )








